
> ## ---- Auxiliary data (MICROM etc) ----
> load("dat/proc-data/soep_imp.RData")

> load("dat/proc-data/plz5_f_und_b.RData")

> soep_microm <- read.dta("dat/proc-data/microm_2005_2018.dta")

> ## ---- Post-imputation variable generation (Part 1) ----
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate(plz = as.character(plz)) %>%
+     mutate(plz = ifelse(nchar(plz) == 4, paste0("0", plz), plz)) %>%
+     left_join(
+       soep_microm %>%
+         dplyr::select(-bula,-gkz,-kkz,-plz,-kr_kkz_rek,-gk_id,-pl_id,-gtyp),
+       by = c("hh_id", "year")
+     ) %>%
+     left_join(
+       plz5_f_und_b %>%
+         as.data.frame() %>%
+         dplyr::select(-AGS05max, -geometry) %>%
+         distinct() %>%
+         group_by(plz) %>%
+         mutate_at(
+           .vars = vars(rent, cmr_p50, cmr_arm),
+           .funs = list(`2005` = ~ ifelse(length(.[year == 2005]) == 0L,
+                                          NA,
+                                          .[year == 2005]))
+         ) %>%
+         mutate(
+           rent_pchg_2005 = rent / rent_2005 - 1,
+           rent_achg_2005 = rent - rent_2005,
+           cmr_p50_pchg_2005 = cmr_p50 / cmr_p50_2005 - 1,
+           cmr_p50_achg_2005 = cmr_p50 - cmr_p50_2005,
+           cmr_arm_pchg_2005 = cmr_arm / cmr_arm_2005 - 1,
+           cmr_arm_achg_2005 = cmr_arm - cmr_arm_2005
+         ) %>%
+         ungroup(),
+       by = c("plz", "year")
+     ) %>%
+     mutate(
+       gtyp3 = case_when(
+         gtyp %in% c("[1] Kernstaedte>==gr.Verdraum") ~ "urban",
+         gtyp %in% c(
+           "[2] Kernstaedte<==gr.Verdraum",
+           "[9] Kernstaedte<==Verdansatz"
+         ) ~ "urban",
+         gtyp %in% c(
+           "[3] Mi-zent.=hochverd.=gr.Verdraum",
+           "[4] so.Gem.=hochverd.=gr.Verdraum",
+           "[5] Mi-zent.=verd.=gr.Verdraum",
+           "[6] so.Gem.=verd.=gr.Verdraum",
+           "[10] Mi-zent.=verd.=Verdansatz",
+           "[11] so.Gem.=verd.=Verdansatz"
+         ) ~ "suburban",
+         gtyp %in% c(
+           "[7] Mi-zent.=laendl=gr.Verdraum",
+           "[8] so.Gem.=laendl=gr.Verdraum",
+           "[12] Mi-zent.=laendl=Verdansatz",
+           "[13] so.Gem.=laendl=Verdansatz"
+         ) ~ "rural",
+         gtyp %in% c(
+           "[14] Mi-zent.=verd.=laendl.",
+           "[15] so.Gem.=verd.=laendl.",
+           "[16] Mi-zent.=laendl=laendl.",
+           "[17] so.Gem.=laendl=laendl."
+         ) ~ "rural",
+         TRUE ~ NA_character_
+       )
+     )
+   
+   ## ---- Economic Risk ----
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate(unemp = ifelse(lm_part == "Unemployed", 1, 0)) %>%
+     group_by(myclass4_r, age5, fem, east, year) %>%
+     mutate(risk = weighted.mean(unemp, w = weight)) %>%
+     ungroup() %>%
+     mutate(risk = ifelse(lm_part %in% c("Active", "Unemployed", "Atypical"), risk, NA))
+   
+   ## ---- Voting ----
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(id) %>%
+     mutate(
+       vote2013 = ifelse(!is.null(vote[year == 2014]),
+                         as.character(vote[year == 2014]),
+                         NA_character_),
+       vote2017 = ifelse(!is.null(vote[year == 2018]),
+                         as.character(vote[year == 2018]),
+                         NA_character_)
+     ) %>%
+     ungroup()
+   
+   ## Since when in same PLZ5/AGS5 area
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate(
+       hh_in_current_plz = hh_at_current_address,
+       hh_in_current_kkz = hh_at_current_address,
+       chg_kkz_full = ifelse(chg_kkz_full == "[1] Yes", 1, 0),
+       chg_zip_full = ifelse(chg_zip_full == "[1] Yes", 1, 0),
+       chg_kkz_full = ifelse(is.na(chg_kkz_full), 0, chg_kkz_full),
+       chg_zip_full = ifelse(is.na(chg_zip_full), 0, chg_zip_full)
+     ) %>%
+     arrange(id, year) %>%
+     group_by(id) %>%
+     mutate_at(.vars = vars(chg_zip_full, chg_kkz_full),
+               .funs = cumsum) %>%
+     group_by(id, chg_zip_full) %>%
+     mutate(hh_in_current_zip = ifelse(chg_zip_full == 0,
+                                       min(hh_at_current_address[chg_zip_full == 0]),
+                                       min(year))) %>%
+     group_by(id, chg_kkz_full) %>%
+     mutate(hh_in_current_kkz = ifelse(chg_kkz_full == 0,
+                                       min(hh_at_current_address[chg_kkz_full == 0]),
+                                       min(year))) %>%
+     ungroup()
+   
+   ## Main breadwinner within households (by year)
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(hh_id, year) %>%
+     mutate(main_breadwinner = (prop_personal_hinc = max(prop_personal_hinc))) %>%
+     ungroup()
+   
+   ## Dummy generation and within-transformation
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     bind_cols(soep_imp$imputations[[m]] %>%
+                 to_dummy(lm_part, suffix = "label")) %>%
+     bind_cols(soep_imp$imputations[[m]] %>%
+                 to_dummy(hh_comp, suffix = "label")) %>%
+     group_by(id) %>%
+     mutate_at(
+       .vars = vars(
+         owner,
+         mover,
+         east,
+         starts_with("lm_part_"),
+         starts_with("hh_comp_"),
+         hh_mmb,
+         hh_prop_ecact,
+         prop_personal_hinc,
+         log_hinc_eq,
+         cold_rent_sqm,
+         cold_rent_load,
+         home_size,
+         asset_ov_ttl_t
+       ),
+       .funs = list(umn = ~ mean(.),
+                    cwu = ~ . - mean(.))
+     ) %>%
+     ungroup()
+ }

> ## ---- Post-imputation variable generation (Part 2) ----
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     bind_cols(soep_imp$imputations[[m]] %>%
+                 to_dummy(edu5, suffix = "label")) %>%
+     bind_cols(soep_imp$imputations[[m]] %>%
+                 to_dummy(myclass4_r, suffix = "label")) %>%
+     group_by(id) %>%
+     mutate_at(
+       .vars = vars(
+         starts_with("edu5_"),
+         starts_with("myclass4_r_"),
+         risk,
+         rent,
+         cmr_arm
+       ),
+       .funs = list(
+         umn = ~ mean(., na.rm = T),
+         cwu = ~ . - mean(., na.rm = T)
+       )
+     ) %>%
+     ungroup()
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(id) %>%
+     mutate_at(
+       .vars = vars(cold_rent_sqm,
+                    cold_rent_load),
+       .funs = list(
+         umn = ~ mean(., na.rm = T),
+         cwu = ~ . - mean(., na.rm = T)
+       )
+     ) %>%
+     ungroup()
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     dplyr::select(
+       -contains("_umn_umn"),-contains("_umn_cwu"),-contains("_cwu_umn"),-contains("_cwu_cwu")
+     )
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate_at(.vars = vars(all_of(names(
+       soep_imp$imputations[[m]]
+     )[sapply(soep_imp$imputations[[m]], function (x)
+       any(is.nan(x)))])),
+     .funs = ~ ifelse(is.nan(.), NA, .))
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(id) %>%
+     mutate_at(.vars = vars(age),
+               .funs = list(
+                 umn = ~ mean(., na.rm = T),
+                 cwu = ~ . - mean(., na.rm = T)
+               )) %>%
+     ungroup()
+ }

> ## ---- Post-imputation variable generation (Part 3) ----
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate(
+       afd = as.numeric(partyid == "AfD"),
+       cdu = as.numeric(partyid == "CDU/CSU"),
+       fdp = as.numeric(partyid == "FDP"),
+       gre = as.numeric(partyid == "Green"),
+       lef = as.numeric(partyid == "Left"),
+       non = as.numeric(partyid == "None"),
+       oth = as.numeric(partyid == "Others"),
+       spd = as.numeric(partyid == "SPD")
+     ) %>%
+     mutate(
+       vote_afd = case_when(
+         year == 2014 ~ as.numeric(vote2013 == "AfD"),
+         year == 2018 ~ as.numeric(vote2017 == "AfD"),
+         TRUE ~ NA_real_
+       ),
+       vote_gre = case_when(
+         year == 2014 ~ as.numeric(vote2013 == "Green"),
+         year == 2018 ~ as.numeric(vote2017 == "Green"),
+         TRUE ~ NA_real_
+       )
+     )
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(id) %>%
+     mutate(first_year = min(year)) %>%
+     ungroup()
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     mutate(year_fac = as.factor(year))
+ }

> ## Save
> save(soep_imp, file = "dat/proc-data/soep_imp.RData")
